# Copyright (c) Facebook, Inc. and its affiliates.
# # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
"""
Various positional encodings for the transformer.
"""
import math

import torch
from torch import nn


class PositionEmbeddingPolarSine(nn.Module):
    """
    This is a more standard version of the position embedding, very similar to the one
    used by the Attention is all you need paper, generalized to work on images.
    """

    def __init__(self, num_pos_feats=64, temperature=10000):
        super().__init__()
        normalize = True
        self.num_pos_feats = num_pos_feats
        self.temperature = temperature
        self.normalize = normalize
        self.scale = 2 * math.pi
    #X bchw
    #center b 2(y,x)
    def forward(self, x, center,mask=None):
        if mask is None:
            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
        not_mask = ~mask
        y_embed = not_mask.cumsum(1, dtype=torch.float32)
        x_embed = not_mask.cumsum(2, dtype=torch.float32)
        dy = y_embed - center[:,[0]].unsqueeze(-1).repeat(1,x.size(2), x.size(3))
        dx = x_embed - center[:,[1]].unsqueeze(-1).repeat(1,x.size(2), x.size(3))
        dx = torch.where(dx == 0,dx + 0.001,dx)
        st = torch.atan(-dy/(dx+0.001)) + torch.where(dx > 0,math.pi*1.5,math.pi*0.5)
        r = torch.sqrt(dx * dx + dy*dy)
        R = math.sqrt(x.size(2) * x.size(2) + x.size(3)*x.size(3))
        eps = 1e-6
        y_embed = r / R * 2 * math.pi
        x_embed = st

        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)

        pos_x = x_embed[:, :, :, None] / dim_t
        pos_y = y_embed[:, :, :, None] / dim_t
        pos_x = torch.stack(
            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
        ).flatten(3)
        pos_y = torch.stack(
            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
        ).flatten(3)
        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
        return pos
    
    def __repr__(self, _repr_indent=4):
        head = "Positional encoding " + self.__class__.__name__
        body = [
            "num_pos_feats: {}".format(self.num_pos_feats),
            "temperature: {}".format(self.temperature),
            "normalize: {}".format(self.normalize),
            "scale: {}".format(self.scale),
        ]
        # _repr_indent = 4
        lines = [head] + [" " * _repr_indent + line for line in body]
        return "\n".join(lines)

if __name__ == "__main__":
    pe = PositionEmbeddingPolarSine(num_pos_feats=128)
    xx = torch.zeros((1,256,14,14))
    pos = torch.zeros((1,2))
    pos[:,0] = 5
    pos[:,1] = 8
    
    ret = pe.forward(xx,pos)    
    ret = 0
    